Part 1: PCA with Penguins

#use 4 variables for our PCA biplot
#first clean the data

penguin_pca <- penguins %>% 
  select(body_mass_g, ends_with("_mm")) %>% 
  drop_na() %>% 
  scale() %>% #scale the data
  prcomp() #run principle compoents, now it's a pca, not a df

#check out the lists in the pca, here we'll check out the rotation (or loadings for each variables)
penguin_pca$rotation
##                          PC1         PC2        PC3        PC4
## body_mass_g        0.5483502 0.084362920 -0.5966001 -0.5798821
## bill_length_mm     0.4552503 0.597031143  0.6443012 -0.1455231
## bill_depth_mm     -0.4003347 0.797766572 -0.4184272  0.1679860
## flipper_length_mm  0.5760133 0.002282201 -0.2320840  0.7837987
#make a dataset that match the dataframe that was used to make the pca, need this to change the aes of the autoplot
penguin_complete <- penguins %>% 
  drop_na(body_mass_g, ends_with("_mm"))

#create a biplot, autoplot will check out the data and assume the type of plot to create, here a biplot - captures ~90% of our data in these two PCAs
autoplot(penguin_pca, 
         data = penguin_complete, 
         colour = 'species',
         loadings = TRUE,
         loadings.label = TRUE) +
  theme_bw()
## Warning: `select_()` is deprecated as of dplyr 0.7.0.
## Please use `select()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

#what's this plot telling us. Correlations between variables and also id-ing major clusters and patters amongst the species. 
#bodymass and flipper length have a positive correlation
#flipper length and bill depth are negatively correlated

##Part 2: ggplot2 customization & reading in different file types

Read in an .xlsx file and do some wrangling

#read in xlsx file and wrangle
fish_noaa <- read_excel(here("data", "foss_landings.xlsx")) %>% 
  clean_names() %>% 
  mutate(across(where(is.character), tolower)) %>%  #mutate/transform across all colums that are characters and change them to lower case
  mutate(nmfs_name = str_sub(nmfs_name, end = -4)) %>%  #removes the last 4 characters in the existing column
  filter(confidentiality == "public")

Make a customized graph:

fish_plot <- ggplot(data = fish_noaa, aes(x = year, y = pounds))+
  geom_line(aes(color = nmfs_name), show.legend = FALSE)+
  theme_minimal()

fish_plot
## Warning: Removed 6 row(s) containing missing values (geom_path).

ggplotly(fish_plot)